/*****************************************************************************************************************************************
******Code for Currie, Mueller-Smith, Rossin-Slater "Violence while in Utero: The Impacts of Assaults During Pregnancy on Birth Outcomes**
******July 2020***********************************************************************************************************************/

*****This code cleans the merged NYC crimes/births data and conducts analyses to produce the following output in the manuscript:

*** Appendix Table A10, Panel B
*** Text Appendix E about re-weighting multi-family sample to be more representative of single-family sample

set more off
set matsize 10000

clear all

cap log close

***** Paths to data set locations, results graphs *****
global bulk 
global home 
global results 
global graphs


**** first, run clean_birthoutcomes.do file to clean the births data, then the mergebirths_tocrime.do file to merge births data to crime data

set scheme s1color

use ${bulk}births_crimes.dta, clear

keep if concep_year>=2004 & concep_year<=2012

gen multifam = 0
replace multifam = 1 if house==1
replace multifam = 1 if bldgclass=="B" | bldgclass=="C"

cap destring numfloors, replace
replace multifam = 0 if bldgclass=="C" & numfloors>3


tab multifam
keep if multifam==1
*** includes single family for now too


count

global crimeclass felony misdemeanor violation

global crimetypes arson assault f_assault burglary drugs kidnap fraud larceny harassment homicide dui weapons other


**** NOTE: we calculate the total number of crimes during pregnancy in the mergebirths_tocrime.do file


*here, calculate the total number of crimes in 10 months after pregnancy (post10-post19) and 10 months before pregnancy (pre1-pre10)

egen totcrimes_post = rowtotal(totcrimes_post10 totcrimes_post11 totcrimes_post12 totcrimes_post13 totcrimes_post14 totcrimes_post15 totcrimes_post16 totcrimes_post17 totcrimes_post18 totcrimes_post19)
egen totcrimes_pre = rowtotal(totcrimes_pre10 totcrimes_pre9 totcrimes_pre8 totcrimes_pre7 totcrimes_pre6 totcrimes_pre5 totcrimes_pre4 totcrimes_pre3 totcrimes_pre2 totcrimes_pre1)



foreach class in $crimeclass {
egen tot`class'_post = rowtotal(tot`class'_post10 tot`class'_post11 tot`class'_post12 tot`class'_post13 tot`class'_post14 tot`class'_post15 tot`class'_post16 tot`class'_post17 tot`class'_post18 tot`class'_post19)
egen tot`class'_pre = rowtotal(tot`class'_pre10 tot`class'_pre9 tot`class'_pre8 tot`class'_pre7 tot`class'_pre6 tot`class'_pre5 tot`class'_pre4 tot`class'_pre3 tot`class'_pre2 tot`class'_pre1)
}

foreach type in $crimetypes {
egen tot`type'_post = rowtotal(tot`type'_post10 tot`type'_post11 tot`type'_post12 tot`type'_post13 tot`type'_post14 tot`type'_post15 tot`type'_post16 tot`type'_post17 tot`type'_post18 tot`type'_post19)
egen tot`type'_pre = rowtotal(tot`type'_pre10 tot`type'_pre9 tot`type'_pre8 tot`type'_pre7 tot`type'_pre6 tot`type'_pre5 tot`type'_pre4 tot`type'_pre3 tot`type'_pre2 tot`type'_pre1)
}


gen anycrimes_preg = totcrimes_preg>0 if totcrimes_preg<.
gen anycrimes_post = totcrimes_post>0 if totcrimes_post<.
gen anycrimes_pre = totcrimes_pre>0 if totcrimes_pre<.

foreach class in $crimeclass {
gen any`class'_preg = tot`class'_preg>0 if tot`class'_preg<.
gen any`class'_post = tot`class'_post>0 if tot`class'_post<.
gen any`class'_pre = tot`class'_pre>0 if tot`class'_pre<.
}

foreach type in $crimetypes {
gen any`type'_preg = tot`type'_preg>0 if tot`type'_preg<.
gen any`type'_post = tot`type'_post>0 if tot`type'_post<.
gen any`type'_pre = tot`type'_pre>0 if tot`type'_pre<.
}

sum anyassault_preg
sum anyassault_post
sum anyassault_pre

sum totassault_preg
sum totassault_post
sum totassault_pre

**** crime/assault rates by months relative to conception

forval i=1/12 {
gen anycrimes_pre`i' = totcrimes_pre`i'>0 if totcrimes_pre`i'<.
foreach class in $crimeclass {
gen any`class'_pre`i' =  tot`class'_pre`i'>0 if tot`class'_pre`i'<.
}
foreach type in $crimetypes {
gen any`type'_pre`i' = tot`type'_pre`i'>0 if tot`type'_pre`i'<.
}
}

forval i=0/45 {
gen anycrimes_post`i' = totcrimes_post`i'>0 if totcrimes_post`i'<.
foreach class in $crimeclass {
gen any`class'_post`i' = tot`class'_post`i'>0 if tot`class'_post`i'<.
}
foreach type in $crimetypes {
gen any`type'_post`i' = tot`type'_post`i'>0 if tot`type'_post`i'<.
}
}


sum anyassault_pre10 anyassault_pre9 anyassault_pre8 anyassault_pre7 anyassault_pre6 anyassault_pre5 anyassault_pre4 anyassault_pre3 anyassault_pre2 anyassault_pre1

sum anyassault_post0 anyassault_post1 anyassault_post2 anyassault_post3 anyassault_post4 anyassault_post5 anyassault_post6 anyassault_post7 anyassault_post8 anyassault_post9

sum anyassault_post10 anyassault_post11 anyassault_post12 anyassault_post13 anyassault_post14 anyassault_post15 anyassault_post16 anyassault_post17 anyassault_post18 anyassault_post19




************************  Additional data cleaning / merging

**** additional outcomes (lbw, preterm, low apgar)

gen lbw = birthweight<2500 if birthweight<.
gen vlbw = birthweight<1500 if birthweight<.
gen hbw = birthweight>4000 if birthweight<.
gen pret = gest_weeks<37 if gest_weeks<.
gen vpret = gest_weeks<34 if gest_weeks<.

gen apgar1_low = apgar1<9 if apgar1<.
gen apgar5_low = apgar5<9 if apgar5<.

gen wgtgain_low = wgtgain<15 if wgtgain<.
gen wgtgain_high = wgtgain>40 if wgtgain<.

gen any_breastfeed = (excl_breastfeed==1|some_breastfeed==1) if excl_breastfeed<. & some_breastfeed<.

gen any_abncong = (any_abnormal==1|any_congen==1) if any_abnormal<. & any_congen<.



label var lbw "Low Birth Weight"
label var vlbw "Very Low Birth Weight"
label var hbw "High Birth Weight"
label var pret "Pre-Term Birth"
label var vpret "Very Pre-Term Birth"
label var apgar1_low "1-Min Apgar less than 9"
label var apgar5_low "5-Min Apgar less than 9"
label var wgtgain_low "Wgt Gain less than 15lbs"
label var wgtgain_high "Wgt Gain more than 40lbs"
label var any_breastfeed "Any Breastfeeding"
label var any_abncong "Any Abnormal or Congenital Conditions"


**** parity
gen parity1 = prevlivebirths==0
gen parity2 = prevlivebirths==1
gen parity3plus = prevlivebirths>=2 & prevlivebirths<.
gen paritymiss = prevlivebirths==.


***mom foreign: recode missing as 0
gen mom_foreignmiss = (mom_foreign==.)
replace mom_foreign = 0 if mom_foreign==.

***dad foreign: recode missing as 0
gen dad_foreignmiss = (dad_foreign==.)
replace dad_foreign = 0 if dad_foreign==.

**** father info missing
gen dadmiss = (dadagemiss==1 & dadeducmiss==1 & dadmissingrace==1 & dad_foreignmiss==1)
tab dadmiss



label var dadmiss "Father Info Missing"

**** Mom education (two categories)
gen momedlow = (momeduc1==1|momeduc2==1)
gen momedhigh = (momeduc3==1|momeduc4==1)

*** Dad education (two categories)
gen dadedlow = (dadeduc1==1|dadeduc2==1)
gen dadedhigh = (dadeduc3==1|dadeduc4==1)

replace assesstot = . if assesstot==0
replace year_built = . if year_built==0

global crimetypes arson assault f_assault burglary drugs kidnap fraud larceny harassment homicide dui weapons other

/***** BBQ SAMPLE: Bronx, Brooklyn, and Queens only ****/
destring mom_borough, replace
gen BBQ = (mom_borough==2|mom_borough==3|mom_borough==4)

keep if BBQ==1




***** how many women have multiple assaults during pregnancy in the multi-family sample?
sum totassault_preg if house==0, d
count if totassault_preg>1 & totassault_preg<. & house==0




*******scale by number units in the building
destring unitsres, replace
sum unitsres, d


foreach var of varlist totassault_preg /* anyassault_preg anyf_assault_preg anyassault_tri1 anyassault_tri2 anyassault_tri3 */ {
gen prob_`var' = `var'/unitsres
}

sum prob_totassault_preg if house==1
sum prob_totassault_preg if house==0

label var prob_totassault_preg "Prob. of Assault During Pregnancy"

/******************** DEFINING GLOBALS WITH OUTCOMES AND CALCULATING INDICES ***********************************/


**NOTE: outcomes are oriented such as a higher value means: worse infant health, more medical services, worse behaviors 

**need to reorient some outcomes : birth weight, gestation length, cmale

foreach var of varlist birthweight gest_weeks {
egen max_`var' = max(`var') if `var'!=.
gen inv_`var' = max_`var'-`var'
}

gen cfem = 1-cmale
gen nowic = 1-wic


**mean and SD of control group for each variable, then calculate z-score
foreach var of varlist inv_birthweight lbw vlbw hbw inv_gest_weeks pret vpret apgar1_low any_abncong nicu cfem child_death firsttri_pnc numvisits nowic momsmoke momdruguse mom_depressed wgtgain_low wgtgain_high csection induced anycompl {
egen m_`var' = mean(`var') if anyassault_post==1
sum m_`var'
egen CM_`var' = mean(m_`var')
sum CM_`var'
egen sd_`var' = sd(`var') if anyassault_post==1
sum sd_`var'
egen CSD_`var' = mean(sd_`var')
sum CSD_`var'

gen Z_`var' = (`var'-CM_`var')/CSD_`var'
}

sum Z_inv_birthweight
sum Z_vlbw
sum Z_nowic

**create indices
egen birthout_index = rowmean(Z_vlbw Z_vpret Z_apgar1_low Z_nicu Z_any_abncong Z_child_death)
egen medical_index = rowmean(Z_firsttri_pnc Z_numvisits Z_induced Z_csection Z_anycompl)
egen behavioral_index = rowmean(Z_momsmoke Z_momdruguse Z_mom_depressed Z_wgtgain_low Z_wgtgain_high Z_nowic)
egen appendix_index = rowmean(Z_vlbw Z_vpret Z_apgar1_low Z_nicu Z_any_abncong Z_child_death Z_inv_birthweight Z_lbw Z_inv_gest_weeks Z_pret)
*note: appendix_index includes also the components of birthout_index (so, it's including all possible birth outcomes)



global birthout vlbw vpret apgar1_low nicu any_abncong child_death birthout_index appendix_index
global medical firsttri_pnc numvisits induced csection anycompl medical_index
global behavioral momsmoke momdruguse mom_depressed wgtgain_low wgtgain_high nowic behavioral_index
global appendix birthweight lbw gest_weeks pret appendix_index


*** index global 
global indices birthout_index appendix_index medical_index behavioral_index 

global momchars momage mom_married mom_foreign momnonhispanicwhite momhispanic momnonhispanicblack momedlow momedhigh anyriskfactors parity1 singleton


*global controls momage1 momage2 momage3 momage4 momagemiss mom_married mom_foreign momnonhispanicwhite momhispanic momnonhispanicblack momotherrace mommissingrace momeduc1 momeduc2 momeduc3 momeduc4 momeducmiss singleton parity1 parity2 parity3plus paritymiss

#delimit ;
global controls momage1 momage2 momage3 momage4 momagemiss dadage1 dadage2 dadage3 dadage4 dadagemiss 
mom_married mom_foreign dad_foreign momnonhispanicwhite momhispanic momnonhispanicblack momotherrace mommissingrace dadnonhispanicwhite dadhispanic dadnonhispanicblack dadotherrace dadmissingrace 
momeduc1 momeduc2 momeduc3 momeduc4 momeducmiss dadeduc1 dadeduc2 dadeduc3 dadeduc4 dadeducmiss singleton parity1 parity2 parity3plus paritymiss ;
#delimit cr




*********************************************
/********* REWEIGHTING REGRESSIONS BASED ON SINGLE-FAMILY CHARACTERISTICS ***************/

*** For text in Appendix E about re-weighting multi-family sample to be more representative of single-family sample

tab mom_borough, gen(d_borough)

global chars d_borough1 d_borough2 d_borough3 momage1 momage2 momage3 momage4 mom_married mom_foreign momnonhispanicwhite momhispanic momnonhispanicblack momotherrace momeduc1 momeduc2 momeduc3 momeduc4 singleton parity1 parity2 parity3plus 

egen binned_groups = group($chars)
bys binned_groups house: gen N_subgroup = [_N] 
bys house: gen N_sample = [_N] 

summ binned_groups
local low  = r(min)
local high = r(max)


gen true_proportion = .
gen modified_proportion = .

forval g = `low'(1)`high' {

    summ N_subgroup if binned_groups ==`g' & house == 1
    local n1 = r(mean)
    summ N_sample   if                       house == 1
    local N1 = r(mean)
    summ N_subgroup if binned_groups ==`g' & house == 0
    local n2 = r(mean)
    summ N_sample   if                       house == 0
    local N2 = r(mean)

    replace true_proportion = (`n2'/`N2') / (`n2'/`N2') if binned_groups ==`g' & house == 0
    replace modified_proportion = true_proportion * (`n2'/`n1') if binned_groups ==`g' & house == 0
    /* Generating a modified proportion so that each bin in the multifamily sample gets the same weight as received in the single family sample */
}

gen true_pwgt = 1 / true_proportion
gen pwgt = 1 / modified_proportion

eststo clear
foreach out in $indices {
eststo: reg `out' prob_totassault_preg $controls i.concep_year i.concep_month if house==0 & [anyassault_preg==1 | anyassault_post==1] [pw=true_pwgt], abs(mom_borough) r
qui sum `e(depvar)' if e(sample)==1
*estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}indices_Ptotassault_pregBBQ_truepw.tex", replace fragment booktabs keep(prob_totassault_preg) 
se star(* .1 ** .05 *** .01)  stats(N,  label("Indiv. obs.")) 
mtitles("Birth Out" "Broad Birth Out" "Med Serv" "Behav/Well" )
wrap label varwidth(20) brackets   ;
#delimit cr


eststo clear
foreach out in $indices {
eststo: reg `out' prob_totassault_preg $controls i.concep_year i.concep_month if house==0 & [anyassault_preg==1 | anyassault_post==1] [pw=pwgt], abs(mom_borough) r
qui sum `e(depvar)' if e(sample)==1
*estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}indices_Ptotassault_pregBBQ_pweight.tex", replace fragment booktabs keep(prob_totassault_preg) 
se star(* .1 ** .05 *** .01)  stats(N,  label("Indiv. obs.")) 
mtitles("Birth Out" "Broad Birth Out" "Med Serv" "Behav/Well" )
wrap label varwidth(20) brackets   ;
#delimit cr
****************************************************************************************************************************************

/***************************************************************************************************************************
***********************REGRESSIONS: ASSAULT DURING PREGNANCY VS. 10 MONTHS AFTER*********************************************
****************************************************************************************************************************/


**** APPENDIX TABLE A10, Panel B

*** MULTI-FAMILY SAMPLE ONLY:
keep if house==0

**** BY CRIME TYPE:

global crimetypes assault



foreach type in $crimetypes {

eststo clear
foreach out in $indices {
eststo: reg `out' prob_tot`type'_preg $controls i.concep_year i.concep_month if any`type'_preg==1 | any`type'_post==1, abs(mom_borough) r
qui sum `e(depvar)' if e(sample)==1
*estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}indices_Ptot`type'_pregBBQ.tex", replace fragment booktabs keep(prob_tot`type'_preg) 
se star(* .1 ** .05 *** .01)  stats(N,  label("Indiv. obs.")) 
mtitles("Birth Out" "Broad Birth Out" "Med Serv" "Behav/Well" )
wrap label varwidth(20) brackets   ;
#delimit cr
}






